/* Copyright 2012 Tobias Marschall
 *
 * This file is part of CLEVER.
 *
 * CLEVER is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * CLEVER is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with CLEVER.  If not, see <http://www.gnu.org/licenses/>.
 */

#include <sstream>
#include <boost/unordered_set.hpp>

#include "VariationUtils.h"

using namespace std;

auto_ptr<vector<Variation> > VariationUtils::removeRedundant(const boost::unordered_map<string,NamedDnaSequence*>& references, const vector<Variation>& variations, bool rightmost) {
	typedef boost::unordered_map<string,NamedDnaSequence*>::const_iterator references_it_t;
	vector<Variation> sorted_list(variations);
	sort(sorted_list.begin(), sorted_list.end(), variation_sort_t());
	vector<Variation>::iterator it = sorted_list.begin();
	// process insertions
	boost::unordered_set<Variation> canonical_insertions;
	for (; it != sorted_list.end(); ++it) {
		if (it->getType() != Variation::INSERTION) break;
//		cerr << (*it) << "  ===>  ";
		it->canonify(references);
//		cerr << (*it) << endl;
		canonical_insertions.insert(*it);
	}
	auto_ptr<vector<Variation> > result(new vector<Variation>(canonical_insertions.begin(), canonical_insertions.end()));
	// process deletions
	NamedDnaSequence* current_ref = 0;
	Variation previous_variation;
	for (; it != sorted_list.end(); ++it) {
		assert(it->getType() == Variation::DELETION);
		if ((current_ref == 0) || (current_ref->getName().compare(it->getChromosome()) != 0)) {
			references_it_t ref_it = references.find(it->getChromosome());
			if (ref_it == references.end()) {
				ostringstream oss;
				oss << "Cannot find reference " << it->getChromosome();
				throw std::runtime_error(oss.str());
			}
			current_ref = ref_it->second;
			previous_variation = Variation();
		}
		bool keep_variant = false;
		if ((previous_variation.getType() == Variation::NONE) || (previous_variation.getLengthDifference() != it->getLengthDifference())) {
			keep_variant = true;
		} else {
			// coordinate within previous deletion, cyclically updated
			int i = previous_variation.getCoordinate2() - 1;
			// iterate backwards from one position left of previous deletion to begin of present deletion
			// and check whether this stretch of sequence is cyclic
			for (int j=previous_variation.getCoordinate1()-1; j>=it->getCoordinate1(); --j) {
				if ((*current_ref)[j] != (*current_ref)[i]) {
					keep_variant = true;
					break;
				}
				i -= 1;
				if (i < it->getCoordinate1()) {
					i = it->getCoordinate2() - 1;
				}
			}
		}
		if (keep_variant) {
			result->push_back(*it);
		}
//		else {
//			cerr << "Dropping deletion " << (*it) << " which is equivalent to " << previous_variation << endl;
//		}
		previous_variation = *it;
	}
	sort(result->begin(), result->end(), variation_position_sort_t());
	return result;
}
